** Set directories 
cd "/work/sf/internal/l1werp20.sf.frb.org/Shared/Olivia/Rob/Covid_Unemployment_Paths"
local input "/l1-res/shared/micro/projects/unrestricted/CPS_2014_Database/Stata Databases"
local datadir "/l1-res/shared/micro/RobV/UI/UI_MonthlyCPS/data"

** Read-in data
use "`input'/CPS_Match.dta", clear 

** Monthly date variable (used below)
capture drop ym
gen date=ym(year,month)
format date %tm

keep if year == 2020 & month >= 1 & month <= 7

** FORM ALL MATCH/FLOW VARIABLES PRIOR TO DATA RESTRICTIONS
** EXCEPTION IS BAD MATCH FLAG; DON'T WANT TO USE THOSE OBS AT ALL (SINGLE OR DOUBLE MATCH)

** Form bad match and missing/allocation flags
egen double id2 = group(id)
xtset id2 mis

gen byte fl_sex_cps=(sex!=f.sex) if matchf==1
gen byte fl_race_cps=(race!=f.race) if matchf==1
gen byte fl_age_cps=(f.d.age!=0 & f.d.age!=1 & f.d.age!=2) if matchf==1
gen byte fl_educ_cps=0
  replace fl_educ_cps=1 if grdatn==31 & (f.grdatn>32 | f.grdatn<31)
  replace fl_educ_cps=1 if grdatn==32 & (f.grdatn<32 | f.grdatn>33)
  replace fl_educ_cps=1 if grdatn==33 & (f.grdatn<33 | f.grdatn>34)
  replace fl_educ_cps=1 if grdatn==34 & (f.grdatn<34 | f.grdatn>35)
  replace fl_educ_cps=1 if grdatn==35 & (f.grdatn<35 | f.grdatn>36)
  replace fl_educ_cps=1 if grdatn==36 & (f.grdatn<36 | f.grdatn>37)
  replace fl_educ_cps=1 if grdatn==37 & (f.grdatn<37 | f.grdatn>39)
  replace fl_educ_cps=1 if grdatn==38 & (f.grdatn<38 | f.grdatn>42)
  replace fl_educ_cps=1 if grdatn==39 & (f.grdatn<39 | f.grdatn>42)
  replace fl_educ_cps=1 if grdatn==40 & (f.grdatn<40 | f.grdatn>43)
  replace fl_educ_cps=1 if grdatn==41 & (f.grdatn<41 | f.grdatn>43)
  replace fl_educ_cps=1 if grdatn==42 & (f.grdatn<41 | f.grdatn>43)
  replace fl_educ_cps=1 if grdatn==43 & (f.grdatn<43)
  replace fl_educ_cps=1 if grdatn==44 & (f.grdatn<44)
  replace fl_educ_cps=1 if grdatn==45 & (f.grdatn<44)
  replace fl_educ_cps=1 if grdatn==46 & (f.grdatn<44)
  replace fl_educ_cps=0 if matchf==0
  replace fl_educ_cps=. if (grdatn==. | f.grdatn==.)
gen fl_bad_cps=(fl_sex_cps==1|fl_race_cps==1|fl_age_cps==1|fl_educ_cps==1)
bys year: summ fl_bad_cps

** Create forward double match (limits to mis 1,2,5,6)
xtset id2 mis
gen byte match2f = (matchf==1 & f.matchf==1)
label variable match2f "=1 if match next 2 months"

/* CREATE GROUP ID THAT TAKES ACCOUNT OF EARLY V LATE */
gen byte early=(mis<=4)
label var early "=1 if mis 1-4"
egen double gid=group(id early)
xtset gid mis

/* CREATE LFSTAT VARIABLE */
gen byte lfstat=E+2*U+3*N
label var lfstat "labor force status, raw"
capture label drop lfstat
label define lfstat 1 "Emp" 2 "Unemp" 3 "NILF"
label values lfstat lfstat
tab lfstat,miss
drop if lfstat<1 | lfstat>3 //HOW MANY OBSERVATIONS ARE DROPPED HERE?
tab lfstat,miss
  /* RV CHANGE HERE (IN EARLIER CODE):  ADDED EXPLICIT MATCH2F RESTRICTION, SINCE LF STATUS 
  CAN BE NONMISSING DESPITE LACK OF DOUBLE MATCH */
gen byte m2f=f.lfstat<. & f2.lfstat<. & match2f==1
label var m2f "=1 if match forward 2"
tab m2f
/* CHANGE UNU TO UUU */
gen byte lfstat2=lfstat
label var lfstat2 "labor force status, adjusted"
label values lfstat2 lfstat
replace lfstat2=2 if l.lfstat2==2 & f.lfstat2==2 & lfstat2==3
/* CHANGE UEU TO UUU */
replace lfstat2=2 if l.lfstat2==2 & f.lfstat2==2 & lfstat2==1
/* CREATE NEW EXIT VARIABLES */
gen byte exit=lfstat==2 & f.lfstat!=2
gen byte exit2=lfstat2==2 & f.lfstat2!=2
label var exit "unadjusted exit from U"
label var exit2 "adjusted exit from U"
gen byte exit_emp=lfstat==2 & f.lfstat==1
gen byte exit2_emp=lfstat2==2 & f.lfstat2==1
label var exit_emp "unadjusted U-E"
label var exit2_emp "adjusted U-E"
gen byte exit_nilf=lfstat==2 & f.lfstat==3
gen byte exit2_nilf=lfstat2==2 & f.lfstat2==3
label var exit_nilf "unadjusted U-N"
label var exit2_nilf "adjusted U-N"

** Form duration 
gen duration=udur
  replace duration=. if duration<0
  label variable duration "unemp duration (with recodes)"

** FILL IN VALUES BASED ON UNU AND UEU RECODES 
** Duration
gen duration2=duration
  replace duration2=(l.duration2 + 4) if (duration2==. & lfstat2==2 & l.lfstat2==2)

** FORM ELIG AND MAJIND VARIABLES HERE
xtset gid mis
** Form eligible flag
gen elig = inlist(utype,1,2)
  replace elig=. if utype==.
** Create elig2 accounting for UNU and UEU recodes 
gen elig2=elig
  replace elig2=l.elig2 if (elig2==. & lfstat2==2 & l.lfstat2==2)

** Create majind2 accounting for UNU and UEU recodes 
** RV: fix recode of majind2 here; missing value is -1, not "."
gen majind = indmaj3
gen majind2=majind
  replace majind2=l.majind2 if (majind2<0 & lfstat2==2 & l.lfstat2==2)

/* CREATE MONTHLY DURATION */
gen byte durmon=round(duration2/(52/12))
label var durmon "unemployment duration in months (recoded)"

/* MONTHLY DURATION CATEGORIES */
gen byte durmoncat=durmon
recode durmoncat 7/9=7 10/12=8 13/48=9
label define durmoncat 7 "7-9" 8 "10-12" 9 "13-48"
label var durmoncat "monthly unemp dur categories (recoded)"
label values durmoncat durmoncat

preserve

** Merge in state dlne and urate data
** Need to match state codes for 2014 forward
use `datadir'/state-fips.dta, clear
sort fips
save `datadir'/fips_match, replace

restore
rename stfips fips
sort fips
** Drop observations from using only
merge m:1 fips using `datadir'/fips_match, update replace keep(1 3) nogen
preserve

** Now form emp data for matching
/*note: original dataset created in C:\data\Haver-Stata\programs*/
** NEED TO LAG BACK ONE MONTH BEFORE MERGING, TO CONFORM TO MATCH FORWARD MONTH IN CPS
use `datadir'/emp_urate_lf_state, clear
gen double date=ym(year,month)
  ** LAG HERE (one month)
  replace date=date-1
format date %tm
sort state date
keep state st statestr fips date d*lne urate emp emp_c epop pop lf lfp
save `datadir'/st_e, replace

restore
sort state date
merge m:1 state date using `datadir'/st_e, keep(1 3) nogen

** Form some variables (from old ext06 and other old programs)

**EDUCATION CATEGORIES (grad degree is 18+ years schooling prior to 1992; same as Lindley/Machin, others)
gen educ=1 if grdatn>=31 & grdatn<=38 & year>1991
  replace educ=2 if grdatn==39 & year>1991
  replace educ=3 if grdatn>=40 & grdatn<=42 & year>1991
  replace educ=4 if grdatn==43 & year>1991
  replace educ=5 if grdatn>=44 & year>1991
label var educ "Educational attainment"
# delimit ;
label define educ 
	1 "Less than high school degree" 
	2 "High school degree"
	3 "Some college" 
	4 "College degree" 
	5 "Graduate degree"
	;
# delimit cr
label values educ educ

** State economic condition quadratics
  ** Unemployment
gen urate2=urate^2
gen urate3=urate^3
  ** Employment
gen d3lne2=d3lne^2
gen d3lne3=d3lne^3

/* CREATE AGE CATEGORIES */
gen byte agecat=age/10
label var agecat "Decade of Age"

/* FEMALE AND MARRIED FEMALE */
  ** First form married indicator (spouse present)
  ** Note that marstat2 only starts in mid-2012 for some reason 
  ** (available in raw data back to 1994)
gen byte married=(marstat2==1) if marstat2<.
  replace married=(marstat1==1) if marstat1<.
gen byte female=sex==2
label var female "=1 if female"
gen byte fem_marr=female*married
label var fem_marr "=1 if married female"


** Need to form durmoncat2 (from old ext06 program)
/* RECODE DURATION CATEGORIES */
drop durmon durmoncat
label drop durmoncat

/* CREATE MONTHLY DURATION */
** 1st month of unemp, 2nd month, etc.
** Maximum is 29th month (duration2=123 or 124)
** Coded duration2=0 cases to 1st month
gen byte durmon2=floor(duration2/(52/12))
/*  replace durmon2=1 if duration2==0 */
label var durmon2 "unemployment duration in months (recoded)"

** To illustrate, display range of duration2 by durmon2
table durmon2, c(min duration2 max duration2) cell(15)

/* GROUPED DURATION CATEGORIES (DURMONCAT>6) */
gen byte durmoncat2=durmon2
recode durmoncat2 7/9=7 10/12=8 13/48=9
label define durmoncat 7 "7-9" 8 "10-12" 9 "13-48"
label var durmoncat2 "monthly unemp dur categories (recoded)"
label values durmoncat2 durmoncat

/* DEFINE SOME MACROS */
/* DROP THE STATE AND DATE DUMMIES */
macro define dur_FV i.durmoncat2
macro define dur_JR dur dur_sq dur_inv new_un gte26
#d ;
macro define X urate* d3lne* i.educ i.agecat female married fem_marr
 i.racegrp i.majind2;
#d cr

** Citizen dummy
gen citizen = citstat != 5 

** Potential misclassification flag
gen misclassified_flag = (laborstatus3 == 2) & (whyabs2 == 14) & (month >= 4)

** Peridnum (for ASEC merge)
tostring(hhid), replace format(%015.0f)
tostring(hhidtwo), replace format(%05.0f)
tostring(lineno), replace format(%02.0f)
egen hhidtest = concat(hhid hhidtwo) 
egen peridnum = concat(hhidtest lineno)

keep month mis age agecat female married fem_marr grdatn educ racegrp lfstat ///
duration duration2 durmon2 durmoncat2 elig elig2 citstat citizen ernwk_all3 peridnum ///
lfstat2 exit exit2 exit_emp exit2_emp exit_nilf exit2_nilf aernush3 E U N ///
EEb UEb NEb EUb UUb NUb ENb UNb NNb EEf UEf NEf EUf UUf NUf ENf UNf NNf ///
fips state st statestr emp emp_c urate urate2 urate3 lfp epop lf pop d3lne d12lne ///
wgt wgtbls ernwgt id matchf match2f fl_sex_cps fl_race_cps fl_educ_cps fl_age_cps fl_bad_cps ///
lineno hhidtest race6 majind2 majind occmaj3 match laborstatus3 whyabs2 misclassified_flag ///
utype pwlgwgt

rename (aernush3 citstat ernwk_all3 durmon2 durmoncat2 occmaj3 laborstatus3 whyabs2) ///
(aernush prcitshp ernwk durmon durmoncat majocc laborstatus whyabs)

** Compress again before saving 
compress
save "Intermediate/cps_match.dta", replace
